
### CONTENTS

# Load 'SUPERMUN_final' (municipal performance data) and 'indicators' (indicator descriptions) datasets 
# Simulate blinded municipality-level treatment identifiers
# Make a list of municipal performance indicators ('varlist') from the indicator descriptions
# Merge municipal performance data with municipality-level treatment identifiers (using blinded identifiers in the blind analysis)
# Merge municipal performance data with ACLED conflict data
# Define unique commune identifier and randomization block identifier
# Define the main dependent variable total_points
# Impute missing values on individual indicators to be able to calculate total_points for all municipalities
# Generate lagged variables


##############################
## PERFORMANCE INDICATOR LIST
## Make a list of variables from the municipal performance indicator descriptions
varlist <- c("region","commune","year",indicators$indicator)

##############################
## MERGE MUNICIPAL PERFORMANCE DATA WITH (BLINDED/UNBLINDED) TREATMENT IDENTIFIERS
SUPERMUN_final <- merge(supermun, treatment.cbo, by=c("region","commune"), all.x=TRUE)

##############################
## MERGE MUNICIPAL PERFORMANCE DATA WITH ACLED CONFLICT DATA

# Create conflict exposure indicator by commune
commune_conflict <- acled %>% filter(is.na(year)==FALSE) %>% group_by(region, commune_edited, year) %>% summarize(conflict_fatalities=sum(fatalities, na.rm=TRUE))
commune_conflict <- commune_conflict %>% rename(commune=commune_edited)

# Merge conflict data with SUPERMUN dataset
SUPERMUN_final <- merge(SUPERMUN_final, commune_conflict, by=c("region", "commune", "year"), all.x=TRUE)
SUPERMUN_final$conflict_fatalities[is.na(SUPERMUN_final$conflict_fatalities)] <- 0

##############################
## DEFINE GROUP INDENTIFERS FOR FIXED EFFECTS ESTIMATION

# Unique commune identifier
SUPERMUN_final$region_commune <- paste(SUPERMUN_final$region, SUPERMUN_final$commune, sep="_")
# Unique randomization block identifier
SUPERMUN_final$randomization_block_cbo <- paste(SUPERMUN_final$region, SUPERMUN_final$situation, sep="_")

##############################
## DEFINE THE MAIN DEPENDENT VARIABLE
SUPERMUN_final$total_points_all=SUPERMUN_final$total_points_ic+SUPERMUN_final$total_points_sd

#Display number of nonmissing observations by treatment condition
SUPERMUN_final %>% filter(year==2020 & municipality_included==1 & is.na(total_points_all)==FALSE) %>% select(cboincentives) %>% table()

##############################
## IMPUTE MISSING TOTAL SCORES

## Display missing values for total points
SUPERMUN_final[is.na(SUPERMUN_final$total_points_sd), c("commune","year")]
SUPERMUN_final[is.na(SUPERMUN_final$total_points_ic), c("commune","year")]
dim(SUPERMUN_final)

## Variable lists for service delivery (sd) and institutional capacity (ic) scores
scores.sd <- c("score_passing_exam", "score_school_supplies", "score_school_wells", "score_school_latrines", "score_assisted_births", "score_vaccines", "score_csps", "score_water_access", "score_birth_certificates")
scores.ic <- c("score_personnel", "score_meetings1", "score_meetings2", "score_attendance", "score_taxes_raised", "score_taxes_forecast", "score_procurement")

SUPERMUN_final$total_points_sd_imp <- 0
for (s in scores.sd) {
  eval(parse(text=paste0("SUPERMUN_final$",s,"_imp <- NA")))
  eval(parse(text=paste0("SUPERMUN_final <- SUPERMUN_final %>% group_by(region_commune) %>% mutate(",s,"_lag1 = lag(",s,", n=1L, order_by=year)) %>% ungroup")))
  eval(parse(text=paste0("SUPERMUN_final$",s,"_imp[is.na(SUPERMUN_final$",s,")==FALSE] <- SUPERMUN_final$",s,"[is.na(SUPERMUN_final$",s,")==FALSE]")))
  eval(parse(text=paste0("SUPERMUN_final$",s,"_imp[is.na(SUPERMUN_final$",s,")==TRUE] <- SUPERMUN_final$",s,"_lag1[is.na(SUPERMUN_final$",s,")==TRUE]")))
  eval(parse(text=paste0("SUPERMUN_final$total_points_sd_imp <- SUPERMUN_final$total_points_sd_imp + SUPERMUN_final$",s,"_imp")))
}

SUPERMUN_final$total_points_ic_imp <- 0
for (s in scores.ic) {
  eval(parse(text=paste0("SUPERMUN_final$",s,"_imp <- NA")))
  eval(parse(text=paste0("SUPERMUN_final <- SUPERMUN_final %>% group_by(region_commune) %>% mutate(",s,"_lag1 = lag(",s,", n=1L, order_by=year)) %>% ungroup")))
  eval(parse(text=paste0("SUPERMUN_final$",s,"_imp[is.na(SUPERMUN_final$",s,")==FALSE] <- SUPERMUN_final$",s,"[is.na(SUPERMUN_final$",s,")==FALSE]")))
  eval(parse(text=paste0("SUPERMUN_final$",s,"_imp[is.na(SUPERMUN_final$",s,")==TRUE] <- SUPERMUN_final$",s,"_lag1[is.na(SUPERMUN_final$",s,")==TRUE]")))
  eval(parse(text=paste0("SUPERMUN_final$total_points_ic_imp <- SUPERMUN_final$total_points_ic_imp + SUPERMUN_final$",s,"_imp")))
}

SUPERMUN_final$total_points_all_imp=SUPERMUN_final$total_points_ic_imp+SUPERMUN_final$total_points_sd_imp


#Number of nonmissing observations by treatment condition
SUPERMUN_final %>% filter(year==2020 & municipality_included==1 & is.na(total_points_all_imp)==FALSE) %>% select(cboincentives) %>% table()

##############################
## GENERATE LAGGED VARIABLES

SUPERMUN_final <- SUPERMUN_final %>% group_by(region_commune) %>% mutate(total_points_all_lag1=dplyr::lag(total_points_all, n=1L, order_by=year)) %>% ungroup
SUPERMUN_final <- SUPERMUN_final %>% group_by(region_commune) %>% mutate(total_points_all_lag2=dplyr::lag(total_points_all, n=2L, order_by=year)) %>% ungroup

# Generate lagged values for dependent variable with imputed missing values
SUPERMUN_final <- SUPERMUN_final %>% group_by(region_commune) %>% mutate(total_points_all_imp_lag1=dplyr::lag(total_points_all_imp, n=1L, order_by=year)) %>% ungroup
SUPERMUN_final <- SUPERMUN_final %>% group_by(region_commune) %>% mutate(total_points_all_imp_lag2=dplyr::lag(total_points_all_imp, n=2L, order_by=year)) %>% ungroup

# Lagged values for institutional capacity score with imputed missing values
SUPERMUN_final <- SUPERMUN_final %>% group_by(region_commune) %>% mutate(total_points_ic_imp_lag1=dplyr::lag(total_points_ic_imp, n=1L, order_by=year)) %>% ungroup
SUPERMUN_final <- SUPERMUN_final %>% group_by(region_commune) %>% mutate(total_points_ic_imp_lag2=dplyr::lag(total_points_ic_imp, n=2L, order_by=year)) %>% ungroup

# Lagged values for service delivery score with imputed missing values
SUPERMUN_final <- SUPERMUN_final %>% group_by(region_commune) %>% mutate(total_points_sd_imp_lag1=dplyr::lag(total_points_sd_imp, n=1L, order_by=year)) %>% ungroup
SUPERMUN_final <- SUPERMUN_final %>% group_by(region_commune) %>% mutate(total_points_sd_imp_lag2=dplyr::lag(total_points_sd_imp, n=2L, order_by=year)) %>% ungroup

# Generate lagged values for individual SUPERMUN performance indicators
for (i in indicators$indicator[1:18]) {
  eval(parse(text=paste0("SUPERMUN_final <- SUPERMUN_final %>% group_by(region_commune) %>% mutate(",i,"_lag1 = dplyr::lag(",i,", n=1L, order_by=year)) %>% ungroup")))
  eval(parse(text=paste0("SUPERMUN_final <- SUPERMUN_final %>% group_by(region_commune) %>% mutate(",i,"_lag2 = dplyr::lag(",i,", n=2L, order_by=year)) %>% ungroup")))
}

# Generate lagged conflict indicator
SUPERMUN_final <- SUPERMUN_final %>% group_by(region_commune) %>% mutate(conflict_fatalities_lag1=dplyr::lag(conflict_fatalities, n=1L, order_by=year)) %>% ungroup
SUPERMUN_final <- SUPERMUN_final %>% group_by(region_commune) %>% mutate(conflict_fatalities_lag2=dplyr::lag(conflict_fatalities, n=2L, order_by=year)) %>% ungroup


##############################
## SAVE FINAL DATASET FOR ANALYSIS
saveRDS("SUPERMUN_final", file = here(datatype, 'Final', 'SUPERMUN_final.RDS'))

message("**08 completed")
